// Trade-Policy Dynamics: Evidence from 60 Years of U.S.-China Trade
// Alessandria,  Khan, Khederlarian, Ruhl, and Steinberg

// inputs:	spread_hs6.dta
//			SITC2-HS96.dta
//			SITC2-HS92.dta
//			spread_hs8.dta
//			hs_mfa_phase_20111208.dta
//			Feenstra/China_HK/china_hk88.dta ... china_hk00.dta
//			uncodes_uscodes_wbcodes.dta
//			Feenstra/WTF/wtf62.dta ... wtf00.dta

// outputs:	imports_tsusa_74-88.dta
//			spread_4dsitc2.dta
//			mfa_4dsitc2.dta

set varabbrev off 
clear all 

global dir_int "../20 Intermediate files/"
global dir_raw "../01 Raw data/"

timer on 1 
// Create the tsusa data 1974--88
{
	use "${dir_raw}Feenstra/imp74/imp74.dta", clear

	forvalues x = 75/88 {
		append using "${dir_raw}Feenstra/imp`x'/imp`x'.dta"
	}
	rename yr year
	rename duty duties
	gen country = name
	gen tar_unwgt = duties/cusvalue
	replace tar_unwgt = 0 if tar_unwgt<0
	bysort sitc2 year country: egen count_jst = nvals(tsusa)
	collapse (mean) tar_unwgt (sum) quantity cusvalue cifvalue duties (first) sitc2 count_jst units , by(tsusa year country)
	replace year = year+1900
	save "${dir_int}imports_tsusa_74-88.dta", replace
}

// Add SITC2 codes to HS8 and HS6 spreads and generate sitc2 (2 and 5 digit) spreads
{
	use "${dir_int}spread_hs6.dta", clear
	capture drop sitc2
	merge m:1 hs6 using "${dir_raw}SITC2-HS96.dta", gen(merge1)
	drop if merge1==2
	rename sitc2 sitc2_orig
	merge m:1 hs6 using "${dir_raw}SITC2-HS92.dta", gen(merge2)
	drop if merge2==2
	replace sitc2_orig = sitc2 if sitc2_orig==""
	drop sitc2 merge*
	rename sitc2_orig sitc2 
	save "${dir_int}spread_hs6_apx.dta", replace

	use "${dir_int}spread_hs8.dta", clear
	capture drop sitc2
	capture gen hs6 = substr(hs8,1,6)
	merge m:1 hs6 using "${dir_raw}SITC2-HS96.dta", gen(merge1)
	drop if merge1==2
	rename sitc2 sitc2_orig
	merge m:1 hs6 using "${dir_raw}SITC2-HS92.dta", gen(merge2)
	drop if merge2==2
	replace sitc2_orig = sitc2 if sitc2_orig==""
	drop sitc2 merge*
	rename sitc2_orig sitc2 
	save "${dir_int}spread_hs8_apx.dta", replace
	gen sitc2_4d = substr(sitc2,1,4)
	foreach x in nntr ntr s {
		bysort year sitc2_4d: egen `x'_med = median(`x')
		bysort year sitc2_4d: egen `x'_mean = mean(`x')
	}
	drop hs* nntr ntr s sitc2
	duplicates drop
	drop if sitc2_4d==""
	save "${dir_int}spread_4dsitc2.dta", replace
}

// Generate MFA File
{
	use "${dir_raw}hs_mfa_phase_20111208.dta", clear
	tostring hs, replace
	gen hs6 = substr(hs,1,6)
	merge m:1 hs6 using "${dir_raw}SITC2-HS96.dta", gen(merge1)
	drop if merge1==2
	rename sitc2 sitc2_orig
	merge m:1 hs6 using "${dir_raw}SITC2-HS92.dta", gen(merge2)
	drop if merge2==2
	replace sitc2_orig = sitc2 if sitc2_orig==""
	drop sitc2 merge*
	rename sitc2_orig sitc2 
	drop if sitc2==""
	gen sitc2_4d = substr(sitc2,1,4)
	collapse (mean) phase, by(sitc2_4d)
	replace phase = round(phase)
	save "${dir_int}mfa_4dsitc2.dta", replace
}

// Generate file with top 50 importers
{
	use "${dir_raw}Feenstra/WTF/wtf00.dta", clear
	replace icode = "532800" if icode=="532760"
	replace ecode = "532800" if ecode=="532760"
	rename sitc4 sitc2_4d
	replace importer=lower(importer)
	replace exporter=lower(exporter)
	drop if real(sitc2_4d)==.
	drop if strpos(importer," nes")>0 | strpos(exporter," nes")>0
	drop if strpos(importer," ns")>0 | strpos(exporter," ns")>0
	drop if importer=="world" | exporter=="world" | importer=="taiwan" | exporter=="taiwan" ///
		| strpos(importer,"congo")>0 | strpos(exporter,"congo")>0 | strpos(importer,"yugo")>0 | strpos(exporter,"yugo")>0
	replace icode="688100" if icode=="586430" | icode=="581120" | icode=="468600" ///
		| icode=="588040" | icode=="467950" | icode=="467620" | icode=="584980" ///
		| icode=="584400" | icode=="584280" | icode=="464170" | icode=="463980" ///
		| icode=="582330" | icode=="460510" | icode=="462680" | icode=="460310"
	replace ecode="688100" if ecode=="586430" | ecode=="581120" | ecode=="468600" ///
		| ecode=="588040" | ecode=="467950" | ecode=="467620" | ecode=="584980" ///
		| ecode=="584400" | ecode=="584280" | ecode=="464170" | ecode=="463980" ///
		| ecode=="582330" | ecode=="460510" | ecode=="462680" | ecode=="460310"
	replace importer="ussr" if icode=="688100"
	replace exporter="ussr" if ecode=="688100"
	drop if icode==ecode
	collapse (sum) value (first) importer exporter, by(icode ecode sitc2_4d year)	
	gen uncode = ecode
	merge m:1 uncode using "${dir_int}uncodes_unique_wbcodes.dta", keepusing(wbcode)
	replace wbcode = "USA" if exporter=="usa"
	keep if _merge==3 | wbcode=="SOV" | wbcode=="USA"
	drop _merge uncode
	rename wbcode iso3
	replace iso3 = "E12" if iso3=="BLX" | iso3=="DNK" | iso3=="DEU" ///
		| iso3=="IRL" | iso3=="GRC" | iso3=="ESP" | iso3=="FRA" | iso3=="ITA" ///
		| iso3=="NLD" | iso3=="PRT" | iso3=="GBR"
		drop if iso3==""
	collapse (sum) value (first) exporter ecode, by(iso3 year)		
	egen temp1 = rank(value), field
	bysort iso3: egen m_rank2000_j = mean(temp1)
	drop if m_rank2000_j>50
	keep m_rank2000_j ecode iso3 exporter
	rename ecode code
	rename exporter country
	duplicates drop
	save "${dir_int}top50_exporters2000.dta", replace
}

// Append HK adjusted values together
{
	use "${dir_raw}Feenstra/China_HK/china_hk00.dta", clear
	forval y = 88/99 {
		append using "${dir_raw}Feenstra/China_HK/china_hk`y'.dta"
	}
	rename sitc4 sitc2_4d
	gen j_iso3 = "CHN" if exporter=="China"
	replace j_iso3 = "HKG" if exporter=="China HK SAR"
	drop if importer=="world" | exporter=="world" | importer=="taiwan" | exporter=="taiwan" ///
		| strpos(importer,"congo")>0 | strpos(exporter,"congo")>0 | strpos(importer,"yugo")>0 | strpos(exporter,"yugo")>0
	replace icode="688100" if icode=="586430" | icode=="581120" | icode=="468600" ///
		| icode=="588040" | icode=="467950" | icode=="467620" | icode=="584980" ///
		| icode=="584400" | icode=="584280" | icode=="464170" | icode=="463980" ///
		| icode=="582330" | icode=="460510" | icode=="462680" | icode=="460310"
	replace importer="ussr" if icode=="688100"
	gen code = icode
	merge m:1 code using "${dir_int}top50_exporters2000.dta", keepusing(iso3 m_rank2000_j)
	drop if _merge==2
	replace importer="row" if _merge==1
	replace iso3="ROW" if _merge==1
	replace icode="000000" if _merge==1
	drop _merge code
	rename iso3 i_iso3
	collapse (sum) value_adj (first) importer exporter icode ecode m_rank2000_j, by(sitc2_4d year i_iso3 j_iso3)
	rename value_adj vadj_ijst
	save "${dir_int}HK_adjustment.dta",replace
}

// Unique country codes 
{
	use "${dir_raw}uncodes_uscodes_wbcodes.dta", clear
	keep un* wbcode
	duplicates drop
	tostring uncode, replace
	bysort uncode: egen temp1 = nvals(wbcode)
	tabdisp wbcode if temp1==2, cellv(un*)
	drop if wbcode!="ZAF" & uncode=="117100"
	drop if wbcode!="KNA" & uncode=="356580"
	drop if wbcode!="KIR" & uncode=="722960"
	drop if wbcode!="GNB" & uncode=="166240"
	drop if wbcode!="ANT" & uncode=="355320"
	drop if wbcode!="FRA" & uncode=="532500"
	drop if wbcode!="ISR" & uncode=="413760"
	drop if wbcode!="MDG" & uncode=="164500"
	drop if wbcode!="JAM" & uncode=="353880"
	replace wbcode="BLX" if wbcode=="BEL" | wbcode=="LUX"
	collapse (first) uncountry wbcode, by(uncode)
	save "${dir_int}uncodes_unique.dta", replace
}


////////////////////////////////////////////////////////////////////////////////
// Generate 5 digit SITC rev 2 dataset 

// Appending Feenstra's files - country wise
// 1962-2000
{
	use "${dir_raw}Feenstra/WTF/wtf00.dta", clear
	forvalues x = 62/99 {
		append using "${dir_raw}Feenstra/WTF/wtf`x'.dta"
	}
	replace icode = "532800" if icode=="532760"
	replace ecode = "532800" if ecode=="532760"
	rename sitc4 sitc2_4d
	replace importer=lower(importer)
	replace exporter=lower(exporter)
	drop if real(sitc2_4d)==.
	drop if strpos(importer," nes")>0 | strpos(exporter," nes")>0
	drop if importer=="world" | exporter=="world"
	gen p_unwgt_ijst = value/quantity
	replace icode="688100" if icode=="586430" | icode=="581120" | icode=="468600" ///
		| icode=="588040" | icode=="467950" | icode=="467620" | icode=="584980" ///
		| icode=="584400" | icode=="584280" | icode=="464170" | icode=="463980" ///
		| icode=="582330" | icode=="460510" | icode=="462680" | icode=="460310"
	replace ecode="688100" if ecode=="586430" | ecode=="581120" | ecode=="468600" ///
		| ecode=="588040" | ecode=="467950" | ecode=="467620" | ecode=="584980" ///
		| ecode=="584400" | ecode=="584280" | ecode=="464170" | ecode=="463980" ///
		| ecode=="582330" | ecode=="460510" | ecode=="462680" | ecode=="460310"
	replace importer="ussr" if icode=="688100"
	replace exporter="ussr" if ecode=="688100"	
	drop if icode==ecode

	gen uncode = ecode
	merge m:1 uncode using "${dir_int}uncodes_unique.dta", keepusing(wbcode)
	replace wbcode = "USA" if exporter=="usa"
	keep if _merge==3 | wbcode=="SOV" | wbcode=="USA"
	drop _merge uncode
	rename wbcode j_iso3

	gen uncode = icode
	merge m:1 uncode using "${dir_int}uncodes_unique.dta", keepusing(wbcode)
	replace wbcode = "USA" if importer=="usa"
	keep if _merge==3 | wbcode=="SOV" | wbcode=="USA"
	drop _merge uncode
	rename wbcode i_iso3

	replace i_iso3 = "E12" if i_iso3=="BLX" | i_iso3=="DNK" | i_iso3=="DEU" ///
		| i_iso3=="IRL" | i_iso3=="GRC" | i_iso3=="ESP" | i_iso3=="FRA" | i_iso3=="ITA" ///
		| i_iso3=="NLD" | i_iso3=="PRT" | i_iso3=="GBR"
	replace j_iso3 = "E12" if j_iso3=="BLX" | j_iso3=="DNK" | j_iso3=="DEU" ///
		| j_iso3=="IRL" | j_iso3=="GRC" | j_iso3=="ESP" | j_iso3=="FRA" | j_iso3=="ITA" ///
		| j_iso3=="NLD" | j_iso3=="PRT" | j_iso3=="GBR"

	gen iso3 = i_iso3
	merge m:1 iso3 using "${dir_int}top50_exporters2000.dta", keepusing(code m_rank2000_j)
	drop if _merge==2
	replace importer="row" if _merge==1
	replace i_iso3="ROW" if _merge==1
	replace icode="000000" if _merge==1
	drop _merge code iso3

	gen iso3 = j_iso3
	merge m:1 iso3 using "${dir_int}top50_exporters2000.dta", keepusing(code m_rank2000_j)
	drop if _merge==2
	replace exporter="row" if _merge==1
	replace j_iso3="ROW" if _merge==1
	replace ecode="000000" if _merge==1
	drop _merge code iso3

	drop if i_iso3==j_iso3
	collapse (mean) p_unwgt_ijst (sum) value quantity (first) importer exporter icode ecode m_rank2000_j, by(sitc2_4d year i_iso3 j_iso3)

	rename value cusvalue
	rename ecode code_exp
	rename icode code_imp

	save "${dir_int}trade_4dsitc2_62-18.dta", replace
}

// These data are from CEPII: 2001-2008
{
	// Convert the csv to dta
	forvalues x = 2001/2008{
		import delimited "${dir_raw}BACI/BACI_HS96_Y`x'_V202102.csv", clear
		gen country_code = i
		merge m:1 country_code using "${dir_raw}country_codes.dta", keepusing(country_name_full iso_3digit_alpha) 
		drop if _merge==2
		drop _merge
		rename country_name_full exporter
		rename iso_3digit_alpha j_iso3
		drop country_code

		gen country_code = j
		merge m:1 country_code using "${dir_raw}country_codes.dta", keepusing(country_name_full iso_3digit_alpha) 
		drop if _merge==2
		drop _merge
		rename country_name_full importer
		rename iso_3digit_alpha i_iso3
		drop country_code

		rename i code_exp
		rename j code_imp

		label var i_iso3 "importer ISO code"
		label var j_iso3 "exporter ISO code"

		tostring k, replace
		replace k = "0"+k if length(k)==5
		save "${dir_int}baci_`x'.dta", replace	
	}
	
	use "${dir_int}baci_2001.dta", clear
	forvalues x = 2002/2008 {
		append using "${dir_int}baci_`x'.dta"
	}

	rename k hs6
	rename t year
	replace importer=lower(importer)
	replace exporter=lower(exporter)
	merge m:1 hs6 using "${dir_raw}SITC2-HS96.dta", gen(merge1)
	drop if merge1==2
	rename sitc2 sitc2_orig
	merge m:1 hs6 using "${dir_raw}SITC2-HS92.dta", gen(merge2)
	drop if merge2==2
	replace sitc2_orig = sitc2 if sitc2_orig==""
	drop sitc2 merge*
	rename sitc2_orig sitc2 
	gen sitc2_4d = substr(sitc2,1,4)
	drop if sitc2_4d==""
	drop if real(sitc2_4d)==.
	
	// New from Kim
	destring(q), ignore("NA") replace
	
	gen p_unwgt_ijst = v/q

	replace i_iso3 = "BLX" if i_iso3=="BEL" | i_iso3=="LUX"
	replace j_iso3 = "BLX" if j_iso3=="BEL" | j_iso3=="LUX"
	replace i_iso3 = "ROM" if i_iso3=="ROU"
	replace j_iso3 = "ROM" if j_iso3=="ROU"

	replace i_iso3="SOV" if i_iso3=="RUS" | i_iso3=="BLR" | i_iso3=="UZB" | i_iso3=="UKR" | i_iso3=="TKM" | i_iso3=="TJK" | i_iso3=="MDA" | ///
	 i_iso3=="LTU" | i_iso3=="LVA" | i_iso3=="KGZ" | i_iso3=="KAZ" | i_iso3=="EST" | i_iso3=="ARM" | i_iso3=="GEO" | i_iso3=="AZE"
	 replace j_iso3="SOV" if j_iso3=="RUS" | j_iso3=="BLR" | j_iso3=="UZB" | j_iso3=="UKR" | j_iso3=="TKM" | j_iso3=="TJK" | j_iso3=="MDA" | ///
	 j_iso3=="LTU" | j_iso3=="LVA" | j_iso3=="KGZ" | j_iso3=="KAZ" | j_iso3=="EST" | j_iso3=="ARM" | j_iso3=="GEO" | j_iso3=="AZE"
	 
	replace i_iso3 = "E12" if i_iso3=="BLX" | i_iso3=="DNK" | i_iso3=="DEU" ///
		| i_iso3=="IRL" | i_iso3=="GRC" | i_iso3=="ESP" | i_iso3=="FRA" | i_iso3=="ITA" ///
		| i_iso3=="NLD" | i_iso3=="PRT" | i_iso3=="GBR"
	replace j_iso3 = "E12" if j_iso3=="BLX" | j_iso3=="DNK" | j_iso3=="DEU" ///
		| j_iso3=="IRL" | j_iso3=="GRC" | j_iso3=="ESP" | j_iso3=="FRA" | j_iso3=="ITA" ///
		| j_iso3=="NLD" | j_iso3=="PRT" | j_iso3=="GBR" 
	 
	gen iso3 = i_iso3
	merge m:1 iso3 using "${dir_int}top50_exporters2000.dta", keepusing(m_rank2000_j)
	drop if _merge==2
	replace importer="row" if _merge==1
	replace i_iso3="ROW" if _merge==1
	drop _merge iso3
	replace code_imp=000000 if i_iso3=="ROW"

	gen iso3 = j_iso3
	merge m:1 iso3 using "${dir_int}top50_exporters2000.dta", keepusing(m_rank2000_j)
	drop if _merge==2
	replace exporter="row" if _merge==1
	replace j_iso3="ROW" if _merge==1
	drop _merge iso3
	replace code_exp=000000 if j_iso3=="ROW"

	drop if i_iso3==j_iso3

	collapse (mean) p_unwgt_ijst (sum) v q (first) importer exporter m_rank2000_j, by(sitc2_4d year i_iso3 j_iso3)
	rename v cusvalue
	rename q quantity

	append using "${dir_int}trade_4dsitc2_62-18.dta"
	save "${dir_int}trade_4dsitc2_62-18.dta", replace
}


gen yr = year
replace year = 2001
merge m:1 year sitc2_4d using "${dir_int}spread_4dsitc2.dta"
drop if year!=2001
drop if _merge==2
drop _merge year
rename yr year

rename cusvalue v_ijst
rename quantity qty_ijst // reporting started in 1989
drop if sitc2_4d==""

merge m:1 sitc2_4d using "${dir_int}mfa_4dsitc2.dta"
drop if _merge==2
drop _merge
replace phase = 0 if phase==.

bysort sitc2_4d: egen pre84_s = mean(v_ijst/(year<1984))
tab sitc2_4d if pre84_s==.
replace sitc2_4d = "0410" if substr(sitc2_4d,1,3)=="041"
replace sitc2_4d = "2220" if substr(sitc2_4d,1,3)=="222"
replace sitc2_4d = "2810" if substr(sitc2_4d,1,3)=="281"
replace sitc2_4d = "5820" if substr(sitc2_4d,1,3)=="582"
replace sitc2_4d = "5830" if substr(sitc2_4d,1,3)=="583"
replace sitc2_4d = "5840" if substr(sitc2_4d,1,3)=="584"
replace sitc2_4d = "7430" if substr(sitc2_4d,1,3)=="743"
replace sitc2_4d = "7520" if substr(sitc2_4d,1,3)=="752"
replace sitc2_4d = "7920" if substr(sitc2_4d,1,3)=="792"

foreach v of varlist *_me* {
	bysort sitc2_4d: egen temp1 = median(`v')
	replace `v' = temp1
	drop temp*
}

collapse (sum) v_ijst qty_ijst (first) importer exporter m_rank2000_j code* phase *_me* (mean) p_unwgt_ijst, by(i_iso3 j_iso3 year sitc2_4d)


label variable v_ijst "Imports (FOB) from exporter j ind s year t"
label variable qty_ijst "Quantities (summed across products)"
label variable nntr_med "Non-NTR Tariff rates (median across products)"
label variable nntr_mean "Non-NTR Tariff rates (mean across products)"
label variable ntr_med "NTR Tariff rates (median across products)"
label variable ntr_mean "NTR Tariff rates (mean across products)"
label variable s_med "Spreads 2001 (median across products)"
label variable s_mean "Spreads 2001 (mean across products)"
label variable phase "MFA Phase number (0 for non-MFA goods)"

save "${dir_int}trade_4dsitc2_62-18.dta", replace

timer off 1
timer list